rm(list = ls())

library(tidyverse)
library(stargazer)
library(lfe)
library(broom) #for tidy
library(lmtest) # For coeftest()
library(sandwich) # For sandwich()
library(estimatr)
library(texreg)
library(psych)
library(default)
library(sjmisc) #for row_sums
library(quantreg)

options(tibble.print_min = 10)
source("functions.R")

load(file = 'data_output/persons_years_sample.Rdata')


###########################################################################
# Time trends -------------------------------------------------------------
###########################################################################

my_quantile = 3 


# COG - EXTRO -------------------------------------------------------------


plot_dta = full_sample %>% 
  filter(!is.na(extro), !is.na(cog)) %>% 
  group_by(vuosi, tercileExtro, tercileCog) %>% 
  summarise(Earnings = median(inc_labor, na.rm = T),
            n = n()) %>% 
  group_by(vuosi) %>% 
  mutate(frac = n/sum(n)) %>% ungroup %>% 
  mutate(g = Earnings/Earnings[vuosi == 2001])

plot_dta = plot_dta %>% 
  filter(tercileExtro != 2, tercileCog != 2) %>% 
  unite(bundle, tercileExtro, tercileCog, remove = F) %>% 
  mutate(bundle = factor(bundle, levels = c("3_1", "1_1", "3_3", "1_3")))



# Earnings growth bars plot
plot_dta %>% 
  filter(vuosi == 2015) %>% 
  ggplot(aes(bundle, g-1)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.6), width = 0.3, fill = "steelblue") +
  labs(x = "Terciles of Extraversion and Cognitive Skills",
       y = "Earnings Growth 2001 - 2015",
       fill = "Year") +
  theme_bw() +
  theme(legend.position = "right") +
  scale_x_discrete(labels = c("1_1" ="Low Extra\n Low Cognitive", "1_3" = "Low Extra\n High Cognitive", 
                              "3_1" = "High Extra\n Low Cognitive", "3_3" = "High Extra\n High Cognitive"))

ggsave("/figures/trends_bars_cog.pdf", height = 4, width = 7)


# Bundle proportions time series

plot_dta %>% 
  mutate(bundle = factor(bundle, levels = c("1_1", "3_3", "1_3", "3_1"))) %>% 
  ggplot(aes(vuosi, frac)) + 
  geom_line(aes(color = bundle, linetype = bundle), linewidth = 1) +
  geom_point(aes(color = bundle, shape = bundle), size = 3) +
  labs(x = "Year",
       y = "Proportion") +
  theme_bw() +
  scale_color_manual(name = "Terciles", labels = var_names, values = color_types) +
  scale_fill_manual(name = "Terciles", labels = var_names, values = color_types) +
  scale_shape_manual(name = "Terciles", labels = var_names, values = point_types) +
  scale_linetype_manual(name = "Terciles", labels = var_names, values = line_types) +
  labs(x = "Year",
       y = "Share") +
  scale_y_continuous(breaks = pretty) +
  scale_x_continuous(breaks = 2000:2015, labels = paste0("'", str_sub(2000:2015, 3, 4)))

ggsave("/figures/trends_bundles_cog.pdf", height = 4, width = 7)


# Average earnings evolutions between separating and pooling bundles

plot_dta = full_sample %>% 
  filter(!is.na(extro), !is.na(college)) %>% 
  group_by(vuosi, tercileExtro, college) %>% 
  summarise(Earnings = median(inc_labor, na.rm = T),
            n = n()) %>% 
  group_by(vuosi) %>% 
  mutate(frac = n/sum(n)) %>% ungroup %>% 
  mutate(g = Earnings/Earnings[vuosi == 2001])

plot_dta = plot_dta %>% 
  filter(tercileExtro != 2, college != 2) %>% 
  unite(bundle, tercileExtro, college, remove = F) %>% 
  mutate(bundle = factor(bundle, levels = c("3_FALSE", "1_FALSE", "3_TRUE", "1_TRUE")))



# Earnings growth bars plot
plot_dta %>% 
  filter(vuosi == 2015) %>% 
  ggplot(aes(bundle, g-1)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.6), width = 0.3, fill = "steelblue") +
  labs(x = "Terciles of Extraversion and College Status",
       y = "Earnings Growth 2001 - 2015",
       fill = "Year") +
  theme_bw() +
  theme(legend.position = "right") +
  scale_x_discrete(labels = c("1_FALSE" = "Low Extra\n No College", "1_TRUE" = "Low Extra\n College", 
                              "3_FALSE" = "High Extra\n No College", "3_TRUE" = "High Extra\n College"))

ggsave("/figures/trends_bars_college.pdf", height = 4, width = 7)



# Extra stuff -------------------------------------------------------------


# load("./data_output/persons_years.Rdata")
# 
# persons = dta %>% 
#   arrange(shnro, -vuosi) %>% 
#   group_by(shnro) %>% 
#   filter(row_number() == 1) %>% ungroup
# 


# 
# # Average earnings evolutions between separating and pooling bundles
# 
# 
# plot_dta = full_sample %>% # %>% filter(inc_labor!=0) %>% 
#   filter(!is.na(extro), !is.na(cog)) %>% 
#   group_by(tercileExtro, college) %>% 
#   mutate(log_growth = inc_log - median(inc_log[vuosi == 2001], na.rm = T),
#          inc_growth = inc_labor/median(inc_labor[vuosi == 2001], na.rm = T)) %>% ungroup
# 
# p = plot_dta %>% 
#   group_by(vuosi, tercileExtro, college) %>% 
#   summarise(Earnings = median(inc_labor, na.rm = T),
#             g = median(inc_growth, na.rm = T),
#             n = n()) %>% 
#   group_by(vuosi) %>% 
#   mutate(frac = n/sum(n)) %>% ungroup
# 
# # Yksinkertainen nelikenttä
# 
# plot_dta %>% 
#   group_by(tercileExtro, college) %>% 
#   summarise(Earnings = median(inc_labor, na.rm = T),
#             n = n()) %>% 
#   ungroup %>% 
#   mutate(frac = n/sum(n)) 
# 
# 
# # Earnings growth bars plot
# p %>% 
#   unite(bundle, tercileExtro, college, sep = "", remove = F) %>% 
#   filter(vuosi == 2015,
#          tercileExtro != 2) %>% 
#   ggplot(aes(bundle, g-1)) +
#   geom_bar(stat = "identity", position = position_dodge(width = 0.6), width = 0.5) +
#   labs(x = "Bundle",
#        y = "Earnings Growth 2001 - 2015",
#        fill = "Year") +
#   scale_fill_brewer(palette = "Paired") +
#   theme_bw() +
#   theme(legend.position = "right") +
#   scale_x_discrete(labels = c("Low Extra\n No College", "Low Extra\n Some College", "High Extra\n No College", "High Extra\n Some College"))
# 
# ggsave("/figures/trends_bars_college.pdf", height = 4, width = 7)
# 
# # Bundle proportions bar plot
# p %>% 
#   unite(bundle, tercileExtro, college, sep = "", remove = F) %>% 
#   filter(vuosi %in% c(2001, 2015),
#          tercileExtro != 2) %>% 
#   ggplot(aes(bundle, frac, fill = as.factor(vuosi))) +
#   geom_bar(stat = "identity", position = position_dodge(width = 0.6), width = 0.5) +
#   labs(x = "Bundle",
#        y = "FrExtra",
#        fill = "Year") +
#   scale_fill_brewer(palette = "Paired") +
#   theme_bw() +
#   theme(legend.position = "right") +
#   scale_x_discrete(labels = c("Low Extra\n No College", "Low Extra\n Some College", "High Extra\n No College", "High Extra\n Some College"))
# 
# ggsave("/figures/trends_bundles_bars_college.pdf", height = 4, width = 8)
# 
# 
# # Bundle proportions time series
# p %>% 
#   ggplot(aes(vuosi, frac)) + 
#   stat_summary(data = filter(p, college == T, tercileExtro == 1), 
#                aes(color = "Low Extra, Some College"), fun = "sum", geom = "line") +
#   stat_summary(data = filter(p, college == F, tercileExtro == my_quantile), 
#                aes(color = "High Extra, No College"), fun = "sum", geom = "line") +
#   stat_summary(data = filter(p, college == F, tercileExtro == 1), 
#                aes(color = "Both low"), fun = "sum", geom = "line") +
#   stat_summary(data = filter(p, college == T, tercileExtro == my_quantile), 
#                aes(color = "Both high"), fun = "sum", geom = "line") +
#   stat_summary(data = filter(p, college == T, tercileExtro == 1), 
#                aes(color = "Low Extra, Some College"), fun = "sum", geom = "point") +
#   stat_summary(data = filter(p, college == F, tercileExtro == my_quantile), 
#                aes(color = "High Extra, No College"), fun = "sum", geom = "point") +
#   stat_summary(data = filter(p, college == F, tercileExtro == 1), 
#                aes(color = "Both low"), fun = "sum", geom = "point") +
#   stat_summary(data = filter(p, college == T, tercileExtro == my_quantile), 
#                aes(color = "Both high"), fun = "sum", geom = "point") +
#   theme_bw() +
#   labs(x = "Year",
#        y = "Proportion") +
#   scale_y_continuous(breaks = pretty) +
#   scale_color_discrete(name = "Bundle")
# 
# ggsave("/figures/trends_bundles_college.pdf", height = 5, width = 7)
# 
# # College attendance time series
# p %>% 
#   ggplot(aes(vuosi, frac)) + 
#   stat_summary(data = filter(p, college ==T), 
#                aes(color = "College"), fun = "sum", geom = "point") +
#   stat_summary(data = filter(p, tercileExtro == my_quantile), 
#                aes(color = "Low Extra"), fun = "sum", geom = "point") +
#   stat_summary(data = filter(p, college ==T), 
#                aes(color = "College"), fun = "sum", geom = "line") +
#   stat_summary(data = filter(p, tercileExtro == my_quantile), 
#                aes(color = "Low Extra"), fun = "sum", geom = "line") +
#   theme_bw() +
#   labs(x = "Year",
#        y = "Proportion") +
#   scale_y_continuous(breaks = pretty) +
#   scale_color_discrete(name = "Bundle")
# 
# 
# # Earnings growth time series
# my_func = "median"
# plot_dta %>% 
#   ggplot(aes(vuosi, inc_growth)) + 
#   stat_summary(data = filter(plot_dta, college == F, tercileExtro == 1), 
#                aes(color = "Both low"), fun = my_func, geom = "line") +
#   stat_summary(data = filter(plot_dta, college == T, tercileExtro == my_quantile), 
#                aes(color = "Both high"), fun = my_func, geom = "line") +
#   stat_summary(data = filter(plot_dta, college == F, tercileExtro == my_quantile), 
#                aes(color = "High Extra, No College"), fun = my_func, geom = "line") +
#   stat_summary(data = filter(plot_dta, college == T, tercileExtro == 1), 
#                aes(color = "Low Extra, Some College"), fun = my_func, geom = "line") +
#   stat_summary(data = filter(plot_dta, college == F, tercileExtro == 1), 
#                aes(color = "Both low"), fun = my_func, geom = "point") +
#   stat_summary(data = filter(plot_dta, college == T, tercileExtro == my_quantile), 
#                aes(color = "Both high"), fun = my_func, geom = "point") +
#   stat_summary(data = filter(plot_dta, college == F, tercileExtro == my_quantile), 
#                aes(color = "High Extra, No College"), fun = my_func, geom = "point") +
#   stat_summary(data = filter(plot_dta, college == T, tercileExtro == 1), 
#                aes(color = "Low Extra, Some College"), fun = my_func, geom = "point") +
#   theme_bw() +
#   labs(x = "Year",
#        y = "Earnings") +
#   scale_y_continuous(breaks = pretty) +
#   scale_color_discrete(name = "Bundle") #labels = c("Both low", "Jocks", "Both high", "Nerds")
# 
# ggsave("/figures/trends_earnings_college.pdf", height = 5, width = 7)
# 
# 
# 
# 
# 
# 
# # COG - EXTRO -------------------------------------------------------------
# 
# 
# 
# plot_dta = full_sample %>%  # %>% filter(inc_labor!=0) %>% 
#   filter(!is.na(extro), !is.na(cog)) %>% 
#   group_by(tercileExtro, tercileCog) %>% 
#   mutate(log_growth = inc_log - median(inc_log[vuosi == 2001], na.rm = T),
#          inc_growth = inc_labor/median(inc_labor[vuosi == 2001], na.rm = T)) %>% ungroup
# 
# 
# p = plot_dta %>% 
#   group_by(vuosi, tercileExtro, tercileCog) %>% 
#   summarise(Earnings = median(inc_labor, na.rm = T),
#             g = median(inc_growth, na.rm = T),
#             n = n()) %>% 
#   group_by(vuosi) %>% 
#   mutate(frac = n/sum(n)) %>% ungroup %>% 
#   mutate(g2 = Earnings/Earnings[vuosi == 2001])
# 
# # Yksinkertainen nelikenttä
# 
# plot_dta %>% 
#   group_by(tercileExtro, tercileCog) %>% 
#   summarise(Earnings = median(inc_labor, na.rm = T),
#             n = n()) %>% 
#   ungroup %>% 
#   mutate(frac = n/sum(n)) 
# 
# 
# # Earnings growth bars plot
# p %>% 
#   unite(bundle, tercileExtro, tercileCog, sep = "", remove = F) %>% 
#   filter(vuosi == 2015,
#          tercileExtro != 2,
#          tercileCog != 2) %>% 
#   ggplot(aes(bundle, g-1)) +
#   geom_bar(stat = "identity", position = position_dodge(width = 0.6), width = 0.5) +
#   labs(x = "Bundle",
#        y = "Earnings Growth 2001 - 2015",
#        fill = "Year") +
#   scale_fill_brewer(palette = "Paired") +
#   theme_bw() +
#   theme(legend.position = "right") +
#   scale_x_discrete(labels = c("Low Extra\n Low Cognitive", "Low Extra\n High Cognitive", "High Extra\n Low Cognitive", "High Extra\n High Cognitive"))
# 
# ggsave("/figures/trends_bars_cog.pdf", height = 4, width = 7)
# 
# # Bundle proportions bar plot
# p %>% 
#   unite(bundle, tercileExtro, tercileCog, sep = "", remove = F) %>% 
#   filter(vuosi %in% c(2001, 2015),
#          tercileExtro != 2,
#          tercileCog != 2) %>% 
#   ggplot(aes(bundle, frac, fill = as.factor(vuosi))) +
#   geom_bar(stat = "identity", position = position_dodge(width = 0.6), width = 0.5) +
#   labs(x = "Bundle",
#        y = "FrExtra",
#        fill = "Year") +
#   scale_fill_brewer(palette = "Paired") +
#   theme_bw() +
#   theme(legend.position = "right") +
#   scale_x_discrete(labels = c("Low Extra\n Low Cognitive", "Low Extra\n High Cognitive", "High Extra\n Low Cognitive", "High Extra\n High Cognitive"))
# 
# ggsave("/figures/trends_bundles_bars_cog.pdf", height = 4, width = 8)
# 
# 
# # Bundle proportions time series
# 
# p %>% 
#   ggplot(aes(vuosi, frac)) + 
#   stat_summary(data = filter(p, tercileCog == my_quantile, tercileExtro == 1), 
#                aes(color = "Low Extra, High Cognitive"), fun = "sum", geom = "line") +
#   stat_summary(data = filter(p, tercileCog == 1, tercileExtro == my_quantile), 
#                aes(color = "High Extra, Low Cognitive"), fun = "sum", geom = "line") +
#   stat_summary(data = filter(p, tercileCog == 1, tercileExtro == 1), 
#                aes(color = "Both low"), fun = "sum", geom = "line") +
#   stat_summary(data = filter(p, tercileCog == my_quantile, tercileExtro == my_quantile), 
#                aes(color = "Both high"), fun = "sum", geom = "line") +
#   stat_summary(data = filter(p, tercileCog == my_quantile, tercileExtro == 1), 
#                aes(color = "Low Extra, High Cognitive"), fun = "sum", geom = "point") +
#   stat_summary(data = filter(p, tercileCog == 1, tercileExtro == my_quantile), 
#                aes(color = "High Extra, Low Cognitive"), fun = "sum", geom = "point") +
#   stat_summary(data = filter(p, tercileCog == 1, tercileExtro == 1), 
#                aes(color = "Both low"), fun = "sum", geom = "point") +
#   stat_summary(data = filter(p, tercileCog == my_quantile, tercileExtro == my_quantile), 
#                aes(color = "Both high"), fun = "sum", geom = "point") +
#   theme_bw() +
#   labs(x = "Year",
#        y = "Proportion") +
#   scale_y_continuous(breaks = pretty) +
#   scale_color_discrete(name = "Bundle")
# 
# ggsave("/figures/trends_bundles_cog.pdf", height = 5, width = 7)
# 
# # By synvuosi
# plot_dta = persons %>%  # %>% filter(inc_labor!=0) %>% 
#   filter(!is.na(extro), !is.na(cog)) %>% 
#   group_by(tercileExtro, tercileCog)
# 
# p = plot_dta %>% 
#   group_by(synvuosi, tercileExtro, tercileCog) %>% 
#   summarise(n = n()) %>% 
#   group_by(synvuosi) %>% 
#   mutate(frac = n/sum(n)) %>% ungroup
# 
# p %>% 
#   ggplot(aes(synvuosi, frac)) + 
#   stat_summary(data = filter(p, tercileCog == my_quantile, tercileExtro == 1), 
#                aes(color = "Low Extra, High Cognitive"), fun = "sum", geom = "line") +
#   stat_summary(data = filter(p, tercileCog == 1, tercileExtro == my_quantile), 
#                aes(color = "High Extra, Low Cognitive"), fun = "sum", geom = "line") +
#   stat_summary(data = filter(p, tercileCog == 1, tercileExtro == 1), 
#                aes(color = "Both low"), fun = "sum", geom = "line") +
#   stat_summary(data = filter(p, tercileCog == my_quantile, tercileExtro == my_quantile), 
#                aes(color = "Both high"), fun = "sum", geom = "line") +
#   stat_summary(data = filter(p, tercileCog == my_quantile, tercileExtro == 1), 
#                aes(color = "Low Extra, High Cognitive"), fun = "sum", geom = "point") +
#   stat_summary(data = filter(p, tercileCog == 1, tercileExtro == my_quantile), 
#                aes(color = "High Extra, Low Cognitive"), fun = "sum", geom = "point") +
#   stat_summary(data = filter(p, tercileCog == 1, tercileExtro == 1), 
#                aes(color = "Both low"), fun = "sum", geom = "point") +
#   stat_summary(data = filter(p, tercileCog == my_quantile, tercileExtro == my_quantile), 
#                aes(color = "Both high"), fun = "sum", geom = "point") +
#   theme_bw() +
#   labs(x = "Year",
#        y = "Proportion") +
#   scale_y_continuous(breaks = pretty) +
#   scale_color_discrete(name = "Bundle")
# 
# # same with just correlations
# plot_dta = persons %>%
#   group_by(synvuosi) %>% 
#   summarise(extro_cog = cor(extro, cog, use = "pairwise.complete.obs"),
#             extro_educ = cor(extro, educ_y, use = "pairwise.complete.obs"),
#             consci_cog = cor(consci, cog, use = "pairwise.complete.obs"),
#             consci_extro = cor(extro, consci, use = "pairwise.complete.obs"))
# plot_dta = plot_dta %>% pivot_longer(cols = -synvuosi)
# plot_dta %>% ggplot(aes(synvuosi, value, color = name)) +
#   geom_line(aes(linetype=name)) +
#   geom_point(aes(shape = name)) +
#   theme_bw()
#   
# 
# # Earnings growth time series
# my_func = "median"
# plot_dta %>% 
#   ggplot(aes(vuosi, inc_growth)) + 
#   stat_summary(data = filter(plot_dta, tercileCog == 1, tercileExtro == 1), 
#                aes(color = "Both low"), fun = my_func, geom = "line") +
#   stat_summary(data = filter(plot_dta, tercileCog == 3, tercileExtro == my_quantile), 
#                aes(color = "Both high"), fun = my_func, geom = "line") +
#   stat_summary(data = filter(plot_dta, tercileCog == 1, tercileExtro == my_quantile), 
#                aes(color = "High Extra, Low Cognitive"), fun = my_func, geom = "line") +
#   stat_summary(data = filter(plot_dta, tercileCog == 3, tercileExtro == 1), 
#                aes(color = "Low Extra, High Cognitive"), fun = my_func, geom = "line") +
#   stat_summary(data = filter(plot_dta, tercileCog == 1, tercileExtro == 1), 
#                aes(color = "Both low"), fun = my_func, geom = "point") +
#   stat_summary(data = filter(plot_dta, tercileCog == 3, tercileExtro == my_quantile), 
#                aes(color = "Both high"), fun = my_func, geom = "point") +
#   stat_summary(data = filter(plot_dta, tercileCog == 1, tercileExtro == my_quantile), 
#                aes(color = "High Extra, Low Cognitive"), fun = my_func, geom = "point") +
#   stat_summary(data = filter(plot_dta, tercileCog == 3, tercileExtro == 1), 
#                aes(color = "Low Extra, High Cognitive"), fun = my_func, geom = "point") +
#   theme_bw() +
#   labs(x = "Year",
#        y = "Earnings") +
#   scale_y_continuous(breaks = pretty) +
#   scale_x_continuous(breaks = pretty) +
#   scale_color_discrete(name = "Bundle") #labels = c("Both low", "Jocks", "Both high", "Nerds")
# 
# ggsave("/figures/trends_earnings_cog.pdf", height = 5, width = 7)

